knitr::opts_knit$set(root.dir = normalizePath("."))
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(c("phyloseq", "DESeq2"))
## Bioconductor version 3.10 (BiocManager 1.30.10), R 3.6.2 (2019-12-12)
## Installing package(s) 'phyloseq', 'DESeq2'
## Old packages: 'bit', 'caTools', 'insight', 'ModelMetrics', 'pROC', 'SQUAREM',
## 'tinytex'
inc.physeq <- readRDS("../data/not.rare.nounclass")
source("../code/functions.R")
The early response group in alfalfa samples compared to reference.
## Subset to early response
alf.early <- subset_samples(inc.physeq, Treatment_Response %in% c("Alfalfa_early", "Reference_early")) %>%
filter_taxa(function(x) sum(x) >= 3, T)
# Be very careful of the design formula in the who_diff_day() function
# This function also selects only LFC >= 2 and alpha 0.01 for significant and increasing otus to be returned
log.plot.early.alf <- alf.early %>%
phyloseq_to_deseq2( ~ Treatment_Response) %>%
DESeq(test = "Wald", fitType = "local") %>%
who_diff_day("Alfalfa_early", "Reference_early", alf.early) %>%
log_plot("Alfalfa OTUS in early group that are significantly changing compared to reference early")
# Add colum to data indicating treatment
log.plot.early.alf.data <- log.plot.early.alf$data %>%
mutate(trt = c("Alfalfa_early"))
# print plot with viridis color
log.plot.early.alf + scale_colour_viridis_d(option = "plasma") +
theme_dark()
## Late
alf.late <- subset_samples(inc.physeq, Treatment_Response %in% c("Alfalfa_late", "Reference_late")) %>%
filter_taxa(function(x) sum(x) >= 3, T)
# Make deseq and plot as above
log.plot.late.alf <- alf.late %>%
phyloseq_to_deseq2( ~ Treatment_Response) %>%
DESeq(test = "Wald", fitType = "local") %>%
who_diff_day("Alfalfa_late", "Reference_late", alf.late) %>%
log_plot("Alfalfa OTUS in late group that are significantly changing compared to reference late")
# Add colum to data indicating treatment
log.plot.late.alf.data <- log.plot.late.alf$data %>%
mutate(trt = c("Alfalfa_late"))
# print plot with viridis color
log.plot.late.alf + scale_colour_viridis_d(option = "plasma") +
theme_dark()
# Common to both early and late with LFC >=2
otustokeep <- intersect(log.plot.early.alf.data$OTU, log.plot.late.alf.data$OTU)
# Trim and rename variables
early_alf_OTUS <- log.plot.early.alf.data %>%
filter(log2FoldChange >= 2) %>%
select(OTU, Phylum, Class, Order, Family, Genus, Alfalfa_early_log2FoldChange = log2FoldChange)
saveRDS(early_alf_OTUS, file = "../data/LFC>2_early_alf_OTUs.RDS")
late_alf_OTUS <- log.plot.late.alf.data %>%
filter(log2FoldChange >= 2) %>%
select(OTU, Phylum, Class, Order, Family, Genus, Alfalfa_late_log2FoldChange = log2FoldChange)
saveRDS(late_alf_OTUS, file = "../data/LFC>2_late_alf_OTUs.RDS")
# join early and late
all_alf <- full_join(early_alf_OTUS, late_alf_OTUS)
This plot is showing the common OTUs with LFC > 4 while also showing the LFC of OTUs observed in only early or late, represented by the points landing below 4 on either axis. Common OTUs with LFC < 4 are left unlabeled.
p <- ggplot(all_alf,
aes(x = Alfalfa_early_log2FoldChange,
y = Alfalfa_late_log2FoldChange,
color = Phylum,
label = OTU)) +
geom_miss_point() +
geom_text_repel(aes(label=ifelse(Alfalfa_early_log2FoldChange>4 & Alfalfa_late_log2FoldChange>4,as.character(OTU),'')),hjust=1,vjust=1) +
theme(legend.position = "none")
p + scale_colour_viridis_d(option = "plasma") +
theme_dark()
# Save this list of alfalfa LFC > 2 for more plots
# Use all_alf
saveRDS(all_alf, file = "../data/LFC_alf_OTUs.RDS")
png("../Figures/early_late_alf_responders.png",height=5,width=8,units='in',res=300)
p + scale_colour_viridis_d(option = "viridis") +
theme_bw() +
theme(legend.position = "none")
dev.off()
## png
## 2
These OTUs had response greater than 2 in both early and late alfalfa
OTUs <- all_alf %>%
filter(Alfalfa_early_log2FoldChange > 2 & Alfalfa_late_log2FoldChange > 2)
write.table(OTUs, file = "../Figures/alf_responders.txt", sep = ",", quote = F, row.names = F)
table <- kable(OTUs) %>%
kable_styling(bootstrap_options = "striped") %>%
scroll_box(width = "100%", height = "400px")
BLASTn results for these two Otu00064 and Otu00494 were most similar to sequences from Pseudomonas fulva and Pseudomonas putida, both members of the Pseudomonas putida group
early <- alf.early %>%
phyloseq_to_deseq2( ~ Treatment_Response) %>%
DESeq(test = "Wald", fitType = "local")
late <- alf.late %>%
phyloseq_to_deseq2( ~ Treatment_Response) %>%
DESeq(test = "Wald", fitType = "local")
plotCounts(early, gene = c("Otu00064"), intgroup = c("Treatment_Response"))
plotCounts(early, gene = c("Otu00494"), intgroup = c("Treatment_Response"))
#Use above line to check if an OTU is increasing or decreasing depending on order of contrast, replace with OTU of interest, run the phyloseq to desey in #other areas to check compost or mix.
plotCounts(late, gene = "Otu00064", intgroup = c("Treatment_Response"))
plotCounts(late, gene = "Otu00494", intgroup = c("Treatment_Response"))
## Early
comp.early <- subset_samples(inc.physeq, Treatment_Response %in% c("Compost_early", "Reference_early")) %>%
filter_taxa(function(x) sum(x) >= 3, T)
# Be very careful of the design formula in the who_diff_day() function
# This function also selects only LFC >= 2 and alpha 0.01 for significant and increasing otus to be returned
log.plot.early.comp <- comp.early %>%
phyloseq_to_deseq2( ~ Treatment_Response) %>%
DESeq(test = "Wald", fitType = "local") %>%
who_diff_day("Compost_early", "Reference_early", comp.early) %>%
log_plot("Compost OTUS in early group that are significantly changing compared to reference early")
# Save a data frame of these results
log.plot.early.comp.data <- log.plot.early.comp$data %>%
mutate(trt = c("Compost_early"))
# print plot with viridis color
log.plot.early.comp + scale_colour_viridis_d(option = "plasma") +
theme_dark()
## Late
comp.late <- subset_samples(inc.physeq, Treatment_Response %in% c("Compost_late", "Reference_late")) %>%
filter_taxa(function(x) sum(x) >= 3, T)
# Make deseq and plot as above
log.plot.late.comp <- comp.late %>%
phyloseq_to_deseq2( ~ Treatment_Response) %>%
DESeq(test = "Wald", fitType = "local") %>%
who_diff_day("Compost_late", "Reference_late", comp.late) %>%
log_plot("Compost OTUS in late group that are significantly changing compared to reference late")
# Save a data frame of these results
log.plot.late.comp.data <- log.plot.late.comp$data %>%
mutate(trt = c("Compost_late"))
# print plot with viridis color
log.plot.late.comp + scale_colour_viridis_d(option = "plasma") +
theme_dark()
# Common to both early and late with LFC >=2
otustokeep <- intersect(log.plot.early.comp.data$OTU, log.plot.late.comp.data$OTU)
# Trim and rename variables
early_comp_OTUS <- log.plot.early.comp.data %>%
filter(log2FoldChange >= 2) %>%
select(OTU, Phylum, Class, Order, Family, Genus, Compost_early_log2FoldChange = log2FoldChange)
saveRDS(early_comp_OTUS, file = "../data/LFC>2_early_comp_OTUs.RDS")
late_comp_OTUS <- log.plot.late.comp.data %>%
filter(log2FoldChange >= 2) %>%
select(OTU, Phylum, Class, Order, Family, Genus, Compost_late_log2FoldChange = log2FoldChange)
saveRDS(late_comp_OTUS, file = "../data/LFC>2_late_comp_OTUs.RDS")
# join early and late
all_comp <- full_join(early_comp_OTUS, late_comp_OTUS)
This plot is showing the common OTUs with LFC > 4 while also showing the LFC of OTUs observed in only early or late, represented by the points landing below 4 on either axis. Common OTUs with LFC < 4 are left unlabeled.
p <- ggplot(all_comp,
aes(x = Compost_early_log2FoldChange,
y = Compost_late_log2FoldChange,
color = Phylum,
label = OTU)) +
geom_miss_point() +
geom_text_repel(aes(label=ifelse(Compost_early_log2FoldChange>4 & Compost_late_log2FoldChange>4,as.character(OTU),'')),hjust=0,vjust=0)
p + scale_colour_viridis_d(option = "plasma") +
theme_dark()
# Save this list of compcompa LFC > 2 for more plots
saveRDS(all_comp, file = "../data/LFC_comp_OTUs.RDS")
These OTUs had response greater than 4 in both early and late compost
OTUs <- all_comp %>%
filter(Compost_early_log2FoldChange>4 & Compost_late_log2FoldChange>4)
kable(OTUs) %>%
kable_styling(bootstrap_options = "striped") %>%
scroll_box(width = "100%", height = "400px")
| OTU | Phylum | Class | Order | Family | Genus | Compost_early_log2FoldChange | Compost_late_log2FoldChange |
|---|---|---|---|---|---|---|---|
| Otu00022 | Chloroflexi | Thermomicrobia | Sphaerobacterales | Sphaerobacteraceae | Sphaerobacter | 7.333830 | 6.397724 |
| Otu00103 | Proteobacteria | Proteobacteria_unclassified | Proteobacteria_unclassified | Proteobacteria_unclassified | Proteobacteria_unclassified | 5.133128 | 6.224653 |
| Otu00130 | Actinobacteria | Actinobacteria | Actinomycetales | Nocardiopsaceae | Thermobifida | 6.563901 | 5.247507 |
| Otu00139 | Proteobacteria | Deltaproteobacteria | Myxococcales | Myxococcales_unclassified | Myxococcales_unclassified | 6.125951 | 5.054827 |
| Otu00277 | Proteobacteria | Deltaproteobacteria | Myxococcales | Myxococcales_unclassified | Myxococcales_unclassified | 4.564746 | 4.587303 |
| Otu00281 | Proteobacteria | Gammaproteobacteria | Gammaproteobacteria_unclassified | Gammaproteobacteria_unclassified | Gammaproteobacteria_unclassified | 4.718963 | 4.305848 |
| Otu00331 | Verrucomicrobia | Opitutae | Opitutales | Opitutaceae | Opitutaceae_unclassified | 4.244084 | 4.339431 |
| Otu00378 | Chloroflexi | Chloroflexi_unclassified | Chloroflexi_unclassified | Chloroflexi_unclassified | Chloroflexi_unclassified | 6.595953 | 5.560310 |
| Otu00395 | Proteobacteria | Gammaproteobacteria | Alteromonadales | Alteromonadaceae | Haliea | 4.690868 | 4.920388 |
| Otu00655 | Proteobacteria | Deltaproteobacteria | Myxococcales | Myxococcales_unclassified | Myxococcales_unclassified | 6.766203 | 5.657540 |
| Otu00737 | Proteobacteria | Deltaproteobacteria | Myxococcales | Myxococcales_unclassified | Myxococcales_unclassified | 5.019151 | 4.032267 |
| Otu00847 | Planctomycetes | Planctomycetia | Planctomycetales | Planctomycetaceae | Planctomycetaceae_unclassified | 4.531744 | 4.556528 |
## Early
mix.early <- subset_samples(inc.physeq, Treatment_Response %in% c("Mix_early", "Reference_early")) %>%
filter_taxa(function(x) sum(x) >= 3, T)
# Be very careful of the design formula in the who_diff_day() function
# This function also selects only LFC >= 2 and alpha 0.01 for significant and increasing otus to be returned
log.plot.early.mix <- mix.early %>%
phyloseq_to_deseq2( ~ Treatment_Response) %>%
DESeq(test = "Wald", fitType = "local") %>%
who_diff_day("Mix_early", "Reference_early", mix.early) %>%
log_plot("Mix OTUS in early group that are significantly changing mixared to reference early")
# Save a data frame of these results
log.plot.early.mix.data <- log.plot.early.mix$data %>%
mutate(trt = c("Mix_early"))
# print plot with viridis color
log.plot.early.mix + scale_colour_viridis_d(option = "plasma") +
theme_dark()
## Late
mix.late <- subset_samples(inc.physeq, Treatment_Response %in% c("Mix_late", "Reference_late")) %>%
filter_taxa(function(x) sum(x) >= 3, T)
# Make deseq and plot as above
log.plot.late.mix <- mix.late %>%
phyloseq_to_deseq2( ~ Treatment_Response) %>%
DESeq(test = "Wald", fitType = "local") %>%
who_diff_day("Mix_late", "Reference_late", mix.late) %>%
log_plot("Mix OTUS in late group that are significantly changing mixared to reference late")
# Save a data frame of these results
log.plot.late.mix.data <- log.plot.late.mix$data %>%
mutate(trt = c("Mix_late"))
# print plot with viridis color
log.plot.late.mix + scale_colour_viridis_d(option = "plasma") +
theme_dark()
# Common to both early and late with LFC >=2
otustokeep <- intersect(log.plot.early.mix.data$OTU, log.plot.late.mix.data$OTU)
early_mix_OTUS <- log.plot.early.mix.data %>%
filter(log2FoldChange >= 2) %>%
select(OTU, Phylum, Class, Order, Family, Genus, Mix_early_log2FoldChange = log2FoldChange)
saveRDS(early_mix_OTUS, file = "../data/LFC>2_early_mix_OTUs.RDS")
late_mix_OTUS <- log.plot.late.mix.data %>%
filter(log2FoldChange >= 2) %>%
select(OTU, Phylum, Class, Order, Family, Genus, Mix_late_log2FoldChange = log2FoldChange)
saveRDS(late_mix_OTUS, file = "../data/LFC>2_late_mix_OTUs.RDS")
# join early and late
all_mix <- full_join(early_mix_OTUS, late_mix_OTUS)
This plot is showing the common OTUs with LFC > 4 while also showing the LFC of OTUs observed in only early or late, represented by the points landing below 4 on either axis. Common OTUs with LFC < 4 are left unlabeled.
p <- ggplot(all_mix,
aes(x = Mix_early_log2FoldChange,
y = Mix_late_log2FoldChange,
color = Phylum,
label = OTU)) +
geom_miss_point() +
geom_text_repel(aes(label=ifelse(Mix_early_log2FoldChange>4 & Mix_late_log2FoldChange>4,as.character(OTU),'')),hjust=0,vjust=0)
p
p + scale_colour_viridis_d(option = "plasma") +
theme_dark()
# Save this list of mixmixa LFC > 2 for more plots
saveRDS(all_mix, file = "../data/LFC_mix_OTUs.RDS")
These OTUs had response greater than 4 in both early and late mix
OTUs <- all_mix %>%
filter(Mix_early_log2FoldChange>4 & Mix_late_log2FoldChange>4)
kable(OTUs) %>%
kable_styling(bootstrap_options = "striped") %>%
scroll_box(width = "100%", height = "400px")
| OTU | Phylum | Class | Order | Family | Genus | Mix_early_log2FoldChange | Mix_late_log2FoldChange |
|---|---|---|---|---|---|---|---|
| Otu00022 | Chloroflexi | Thermomicrobia | Sphaerobacterales | Sphaerobacteraceae | Sphaerobacter | 5.603070 | 4.400343 |
| Otu00103 | Proteobacteria | Proteobacteria_unclassified | Proteobacteria_unclassified | Proteobacteria_unclassified | Proteobacteria_unclassified | 4.425215 | 4.625760 |
| Otu00130 | Actinobacteria | Actinobacteria | Actinomycetales | Nocardiopsaceae | Thermobifida | 4.795846 | 4.141254 |
| Otu00808 | Firmicutes | Bacilli | Bacillales | Paenibacillaceae_1 | Paenibacillus | 4.657377 | 4.580972 |
| Otu00847 | Planctomycetes | Planctomycetia | Planctomycetales | Planctomycetaceae | Planctomycetaceae_unclassified | 4.158366 | 4.386717 |
# We have lists of LFC OTUs, need to create a function that takes this and a target response group and returns the average relative abundance for these
# input: list of OTUs, Treatment group
resp_alf <- readRDS("../data/LFC_alf_OTUs.RDS") %>% dplyr::rename(label = OTU)
OTUs <- as.character(resp_alf$label)
alf.rela.early <- RelaOTUs(inc.physeq, c("Alfalfa_early"), OTUs) %>%
select(label = OTU, Alfalfa_early_meanRela = mean)
alf.rela.late <- RelaOTUs(inc.physeq, c("Alfalfa_late"), OTUs) %>%
select(label = OTU, meanAlfalfa_late_meanRela = mean)
all_alf_rela <- full_join(resp_alf, alf.rela.early) %>%
full_join(alf.rela.late)
## Joining, by = "label"Joining, by = "label"